library(tidyverse)
library(readxl) # for importing raw data
library(haven)

## Load data ####

# load WFH measure
wfh_occ_educ <- read_stata("Raw data/wfh_occ_educ.dta")

# Obtain fmli education data and merge into new wfh data set
fmli202_wfh <- read_excel("Raw data/fmli202.xlsx") %>% 
  select(NEWID, EDUC_REF, STATE, EARNCOMP, EDUCA2, FAM_TYPE, HIGH_EDU, 
         INCOMEY1, INCOMEY2, PSU, OCCUCOD1, OCCUCOD2, INTERI, FINCBTAX)
fmli203_wfh <- read_excel("Raw data/fmli203.xlsx") %>% 
  select(NEWID, EDUC_REF, STATE, EARNCOMP, EDUCA2, FAM_TYPE, HIGH_EDU, 
         INCOMEY1, INCOMEY2, PSU, OCCUCOD1, OCCUCOD2, INTERI, FINCBTAX)
fmli204_wfh <- read_excel("Raw data/fmli204.xlsx") %>% 
  select(NEWID, EDUC_REF, STATE, EARNCOMP, EDUCA2, FAM_TYPE, HIGH_EDU, 
         INCOMEY1, INCOMEY2, PSU, OCCUCOD1, OCCUCOD2, INTERI, FINCBTAX)
fmli211_wfh <- read_excel("Raw data/fmli211.xlsx") %>% 
  select(NEWID, EDUC_REF, STATE, EARNCOMP, EDUCA2, FAM_TYPE, HIGH_EDU, 
         INCOMEY1, INCOMEY2, PSU, OCCUCOD1, OCCUCOD2, INTERI, FINCBTAX)
fmli212_wfh <- read_excel("Raw data/fmli212.xlsx") %>% 
  select(NEWID, EDUC_REF, STATE, EARNCOMP, EDUCA2, FAM_TYPE, HIGH_EDU, 
         INCOMEY1, INCOMEY2, PSU, OCCUCOD1, OCCUCOD2, INTERI, FINCBTAX)

fmli_wfh <- rbind(fmli202_wfh, fmli203_wfh, fmli204_wfh, fmli211_wfh, fmli212_wfh)

# Load member files
memi202_wfh <- read_excel("Raw data/memi202.xlsx") %>% rename_all(toupper) %>% 
  select(NEWID, EARNER, EARNTYPE, EDUCA, IN_COLL, INC_HRSQ, INCOMEY, WKSTATUS,
         OCCUCODE, CU_CODE, SALARYB, SALARYBX, SALARYX, SALARYXI, AGE, INCNONWK)
memi203_wfh <- read_excel("Raw data/memi203.xlsx") %>% rename_all(toupper) %>% 
  select(NEWID, EARNER, EARNTYPE, EDUCA, IN_COLL, INC_HRSQ, INCOMEY, WKSTATUS,
         OCCUCODE, CU_CODE, SALARYB, SALARYBX, SALARYX, SALARYXI, AGE, INCNONWK)
memi204_wfh <- read_excel("Raw data/memi204.xlsx") %>% rename_all(toupper) %>% 
  select(NEWID, EARNER, EARNTYPE, EDUCA, IN_COLL, INC_HRSQ, INCOMEY, WKSTATUS,
         OCCUCODE, CU_CODE, SALARYB, SALARYBX, SALARYX, SALARYXI, AGE, INCNONWK)
memi211_wfh <- read_excel("Raw data/memi211.xlsx") %>% rename_all(toupper) %>% 
  select(NEWID, EARNER, EARNTYPE, EDUCA, IN_COLL, INC_HRSQ, INCOMEY, WKSTATUS,
         OCCUCODE, CU_CODE, SALARYB, SALARYBX, SALARYX, SALARYXI, AGE, INCNONWK)
memi212_wfh <- read_excel("Raw data/memi212.xlsx") %>% rename_all(toupper) %>% 
  select(NEWID, EARNER, EARNTYPE, EDUCA, IN_COLL, INC_HRSQ, INCOMEY, WKSTATUS,
         OCCUCODE, CU_CODE, SALARYB, SALARYBX, SALARYX, SALARYXI, AGE, INCNONWK)

memi_wfh <- rbind(memi202_wfh, memi203_wfh, memi204_wfh, memi211_wfh, memi212_wfh)


## Industry-based measure ####
memi_wfh_ind <- memi_wfh %>% 
  select(-c(SALARYB, SALARYBX, SALARYX, SALARYXI)) %>% 
  filter(CU_CODE %in% c(1,0,2)) %>% # keep only spouses/partners and reference person
  rowwise() %>% mutate(educ_ce_cps = ifelse(EDUCA==1,1,EDUCA-1),
                       ind_ce_cps = OCCUCODE) %>% 
  mutate(pernum = ifelse(CU_CODE==1,"1","2")) %>% # reference person is person 1, spouse/partner is 2
  pivot_wider(
    names_from = pernum,
    values_from = c(EARNER, EARNTYPE, EDUCA, IN_COLL, INC_HRSQ, INCOMEY, OCCUCODE, CU_CODE, WKSTATUS,
                    AGE, INCNONWK, educ_ce_cps, ind_ce_cps))


wfh_ind <- fmli_wfh %>% full_join(memi_wfh_ind, by = "NEWID") %>% 
  mutate(ID = floor(NEWID/10)) %>% 
  arrange(ID,INTERI) %>% 
  group_by(ID) %>% 
  filter(row_number()==1) %>% 
  ungroup()

# construct measures
wfh_ind <- wfh_ind %>% 
  left_join(wfh_occ_educ %>% 
              rename(ind_ce_cps_1 = ind_ce_cps, 
                     educ_ce_cps_1 = educ_ce_cps,
                     wfh_prop_1 = wfh_prop) %>%
              select(-count),
            by = c("ind_ce_cps_1","educ_ce_cps_1")) %>% 
  left_join(wfh_occ_educ %>% 
              rename(ind_ce_cps_2 = ind_ce_cps, 
                     educ_ce_cps_2 = educ_ce_cps,
                     wfh_prop_2 = wfh_prop) %>%
              select(-count),
            by = c("ind_ce_cps_2","educ_ce_cps_2")) %>% 
  ungroup() %>% 
  mutate(wfh = case_when(
    !is.na(WKSTATUS_1) & !is.na(WKSTATUS_2) & !is.na(wfh_prop_1) & !is.na(wfh_prop_2) ~ (wfh_prop_1+wfh_prop_2)/2,
    !is.na(INCNONWK_2) & !is.na(WKSTATUS_1) & !is.na(wfh_prop_1) ~ wfh_prop_1,
    !is.na(INCNONWK_1) & !is.na(WKSTATUS_2) & !is.na(wfh_prop_2) ~ wfh_prop_2,
    (INCNONWK_1==1 | INCNONWK_1==4) & (INCNONWK_2==1 | INCNONWK_2==4) ~ 1,
    is.na(EARNER_2) & is.na(WKSTATUS_2) & is.na(INCNONWK_2) & !is.na(wfh_prop_1) & !is.na(WKSTATUS_1) ~  wfh_prop_1,
    is.na(EARNER_2) & is.na(WKSTATUS_2) & is.na(INCNONWK_2) & (INCNONWK_1==1 | INCNONWK_1==4) ~  1)) %>% 
  arrange(ID) %>% 
  select(ID, wfh) %>% 
  filter(!is.na(wfh))

write.csv(wfh_ind,"wfh_ind.csv", row.names = FALSE)

